1. Opis zestawu danych

  2. Wczytanie danych

  3. Wczytanie danych z pliku i nazwanie kolumn.

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.0.5     ✓ dplyr   1.0.3
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
drugs <- read.delim("drug_consumption.data", header = FALSE, sep = ",", fill = TRUE)
  
#str(data)
colnames(drugs) <- c(
  "id", #1
  "age", #2
  "gender", #3
  "education", #4
  "country", #5
  "ethnicity", #6
  "nscore", #7
  "escore", #8
  "oscore", #9
  "ascore", #10
  "cscore", #11
  "impulsive", #12
  "sensationSeeing", #13
  "alcohol", #14
  "amphetamine", #15
  "amyl", #16
  "benzos", #17
  "caffeine", #18
  "cannabis", #19
  "chocolate", #20
  "cocaine", #21
  "crack", #22
  "ecstasy", #23
  "heroin", #24
  "ketamine", #25
  "legalHighs", #26
  "lsd", #27
  "meth", #28
  "mushrooms", #29
  "nicotine", #30
  "semer", #31
  "vsa" #32
) 
  1. Mapowanie danych jakościowych - zamiana wartości liczbowych na odpowiadające im kategorie.
  1. mapowanie bezpośrednie
# cast values to characters to avoid negative indexing
drugs[ ,c(2:11)] <- lapply(drugs[ ,c(2:11)], as.character)

#usage
usage <- c(
  "Never Used",
  "Used over a Decade Ago", 
  "Used in Last Decade",
  "Used in Last Year",
  "Used in Last Month",
  "Used in Last Week",
  "Used in Last Day"
)
names(usage) <- c("CL0", "CL1", "CL2", "CL3", "CL4", "CL5", "CL6")
usage
##                      CL0                      CL1                      CL2 
##             "Never Used" "Used over a Decade Ago"    "Used in Last Decade" 
##                      CL3                      CL4                      CL5 
##      "Used in Last Year"     "Used in Last Month"      "Used in Last Week" 
##                      CL6 
##       "Used in Last Day"
for (i in 14:32) {
  tmp <- usage[drugs[ ,i]]
  drugs[ ,i] <- as.factor(tmp)
  levels(drugs[ ,i]) <- usage
}

# age
ageLevels <- c("18-24","25-34","35-44","45-54","55-64","65+")
names(ageLevels) <- c(
  -0.95197, # 18-24
  -0.07854, # 25-34
  0.49788,  # 35-44
  1.09449,  # 45-45
  1.82213,  # 55-64
  2.59171   # 65+
)  
tmp <- ageLevels[drugs$age]
drugs$age <- as.factor(tmp)
levels(drugs$age) <- ageLevels

# gender
genderLevels <- c("M", "F")
names(genderLevels) <- c(-0.48246, 0.48246)
tmp <- genderLevels[drugs$gender]
drugs$gender <- as.factor(tmp)

#education
educationLevels <- c(
  "Left school before 16 years",
  "Left school at 16 years",
  "Left school at 17 years",
  "Left school at 18 years",
  "Some college, no degree",
  "Professional certificate",
  "University degree",
  "Masters degree",
  "Doctorate degree"
)
names(educationLevels) <- c(
  -2.43591, # Left school before 16 years
  -1.73790, # Left school at 16 years
  -1.43719, # Left school at 17 years
  -1.22751, # Left school at 18 years
  -0.61113, # Some college, no degree
  -0.05921, # Professional certificate
  0.45468,  # University degree
  1.16365,  # Masters degree  
  1.98437   # Doctorate degree
)
tmp <- educationLevels[drugs$education]
drugs$education <- as.factor(tmp)
levels(drugs$education) <- educationLevels

#country
countryLevels <- c("Australia", "Canada", "New Zealand", "Other",
                   "Ireland", "UK", "USA")
names(countryLevels) <- c(
  -0.09765, # Australia
  0.24923,  # Canada
  -0.46841, # New Zealand
  -0.28519, # Other
  0.21128,  # Ireland
  0.96082,  # UK
  -0.57009  # USA
)
tmp <- countryLevels[drugs$country]
drugs$country <- as.factor(tmp)

#ethnicity
ethnicityLevels <- c("Asian", "Black", "Mixed-Black/Asian", "Mixed-White/Asian",
                     "Mixed-White/Black", "Other", "White")
names(ethnicityLevels) <- c(
  -0.50212, # Asian
  -1.10702, # Black
  1.90725,  # Mixed-Black/Asian
  0.12600,  # Mixed-White/Asian
  -0.22166, # Mixed-White/Black
  0.11440,  # Other
  -0.31685  # White
)
tmp <- ethnicityLevels[drugs$ethnicity]
drugs$ethnicity <- as.factor(tmp)
  1. mapowanie poprzez manipulację danymi z wykorzystaniem Tidyverse
# NEO PI-R score levels
nscoreLevels <- c(12:60)
escoreLevels <- c(16,18:59)
oscoreLevels <- c(24,26,28:60)
ascoreLevels <- c(12,16,18,23:60)
cscoreLevels <- c(17,19:57,59)

#nscores
scores <- drugs %>%
  distinct(nscore) %>%
  arrange(nscore) %>%
  pull(nscore)

names(nscoreLevels) <- scores
tmp <- nscoreLevels[drugs$nscore]
drugs$nscore <- as.factor(tmp)

#escores
scores <- drugs %>%
  distinct(escore) %>%
  arrange(escore) %>%
  pull(escore)

names(escoreLevels) <- scores
tmp <- escoreLevels[drugs$escore]
drugs$escore <- as.factor(tmp)

#oscores
scores <- drugs %>%
  distinct(oscore) %>%
  arrange(oscore) %>%
  pull(oscore)

names(oscoreLevels) <- scores
tmp <- oscoreLevels[drugs$oscore]
drugs$oscore <- as.factor(tmp)

#ascores
scores <- drugs %>%
  distinct(ascore) %>%
  arrange(ascore) %>%
  pull(ascore)

names(ascoreLevels) <- scores
tmp <- ascoreLevels[drugs$ascore]
drugs$ascore <- as.factor(tmp)

#cscores
scores <- drugs %>%
  distinct(cscore) %>%
  arrange(cscore) %>%
  pull(cscore)

names(cscoreLevels) <- scores
tmp <- cscoreLevels[drugs$cscore]
drugs$cscore <- as.factor(tmp)

# come back to numeric values to allow quantitative analysis
drugs[ ,c(7:11)] <- lapply(drugs[ ,c(7:11)], as.numeric)
  1. Sprawdzenie występowania braków danych - nie stwierdzono błędnych danych.
if (sum(is.na(drugs)) > 0) {
  drop_na(drugs)
} else {
  print("No missing values")
}
## [1] "No missing values"
  1. Podsumowanie danych
summary(drugs)
##        id            age      gender                     education  
##  Min.   :   1.0   18-24:643   F:942   Masters degree          :506  
##  1st Qu.: 474.0   25-34:481   M:943   Doctorate degree        :480  
##  Median : 946.0   35-44:356           Professional certificate:283  
##  Mean   : 945.3   45-54:294           University degree       :270  
##  3rd Qu.:1417.0   55-64: 93           Left school at 18 years :100  
##  Max.   :1888.0   65+  : 18           Left school at 16 years : 99  
##                                       (Other)                 :147  
##         country                 ethnicity        nscore          escore     
##  Australia  :  54   Asian            :  26   Min.   : 1.00   Min.   : 1.00  
##  Canada     :  87   Black            :  33   1st Qu.: 7.00   1st Qu.: 6.00  
##  Ireland    :  20   Mixed-Black/Asian:   3   Median :25.00   Median :24.00  
##  New Zealand:   5   Mixed-White/Asian:  20   Mean   :20.09   Mean   :18.14  
##  Other      : 118   Mixed-White/Black:  20   3rd Qu.:31.00   3rd Qu.:28.00  
##  UK         :1044   Other            :  63   Max.   :49.00   Max.   :42.00  
##  USA        : 557   White            :1720                                  
##      oscore          ascore          cscore     impulsive        
##  Min.   : 1.00   Min.   : 1.00   Min.   : 1   Min.   :-2.555240  
##  1st Qu.: 5.00   1st Qu.: 5.00   1st Qu.: 6   1st Qu.:-0.711260  
##  Median :14.00   Median :15.00   Median :16   Median :-0.217120  
##  Mean   :15.73   Mean   :17.18   Mean   :18   Mean   : 0.007216  
##  3rd Qu.:26.00   3rd Qu.:29.00   3rd Qu.:29   3rd Qu.: 0.529750  
##  Max.   :35.00   Max.   :41.00   Max.   :41   Max.   : 2.901610  
##                                                                  
##  sensationSeeing                       alcohol                    amphetamine 
##  Min.   :-2.078480   Never Used            : 34   Never Used            :976  
##  1st Qu.:-0.525930   Used over a Decade Ago:505   Used over a Decade Ago:102  
##  Median : 0.079870   Used in Last Decade   : 68   Used in Last Decade   :243  
##  Mean   :-0.003292   Used in Last Year     :287   Used in Last Year     : 75  
##  3rd Qu.: 0.765400   Used in Last Month    :759   Used in Last Month    : 61  
##  Max.   : 1.921730   Used in Last Week     :198   Used in Last Week     :198  
##                      Used in Last Day      : 34   Used in Last Day      :230  
##                      amyl                         benzos    
##  Never Used            :1305   Never Used            :1000  
##  Used over a Decade Ago:   3   Used over a Decade Ago:  95  
##  Used in Last Decade   : 237   Used in Last Decade   : 234  
##  Used in Last Year     :  24   Used in Last Year     : 120  
##  Used in Last Month    :  14   Used in Last Month    :  84  
##  Used in Last Week     :  92   Used in Last Week     : 236  
##  Used in Last Day      : 210   Used in Last Day      : 116  
##                    caffeine                      cannabis  
##  Never Used            :  27   Never Used            :413  
##  Used over a Decade Ago:1385   Used over a Decade Ago:463  
##  Used in Last Decade   :  24   Used in Last Decade   :266  
##  Used in Last Year     : 106   Used in Last Year     :140  
##  Used in Last Month    : 273   Used in Last Month    :185  
##  Used in Last Week     :  60   Used in Last Week     :211  
##  Used in Last Day      :  10   Used in Last Day      :207  
##                   chocolate                     cocaine    
##  Never Used            : 32   Never Used            :1038  
##  Used over a Decade Ago:807   Used over a Decade Ago:  19  
##  Used in Last Decade   : 10   Used in Last Decade   : 270  
##  Used in Last Year     :296   Used in Last Year     :  99  
##  Used in Last Month    :683   Used in Last Month    :  41  
##  Used in Last Week     : 54   Used in Last Week     : 258  
##  Used in Last Day      :  3   Used in Last Day      : 160  
##                     crack                        ecstasy    
##  Never Used            :1627   Never Used            :1021  
##  Used over a Decade Ago:   2   Used over a Decade Ago:  21  
##  Used in Last Decade   : 112   Used in Last Decade   : 234  
##  Used in Last Year     :   9   Used in Last Year     : 156  
##  Used in Last Month    :   9   Used in Last Month    :  63  
##  Used in Last Week     :  59   Used in Last Week     : 277  
##  Used in Last Day      :  67   Used in Last Day      : 113  
##                     heroin                       ketamine   
##  Never Used            :1605   Never Used            :1490  
##  Used over a Decade Ago:  13   Used over a Decade Ago:   4  
##  Used in Last Decade   :  94   Used in Last Decade   : 142  
##  Used in Last Year     :  24   Used in Last Year     :  42  
##  Used in Last Month    :  16   Used in Last Month    :  33  
##  Used in Last Week     :  65   Used in Last Week     : 129  
##  Used in Last Day      :  68   Used in Last Day      :  45  
##                   legalHighs                       lsd      
##  Never Used            :1094   Never Used            :1069  
##  Used over a Decade Ago:  67   Used over a Decade Ago:  13  
##  Used in Last Decade   : 198   Used in Last Decade   : 177  
##  Used in Last Year     : 110   Used in Last Year     :  97  
##  Used in Last Month    :  64   Used in Last Month    :  56  
##  Used in Last Week     : 323   Used in Last Week     : 214  
##  Used in Last Day      :  29   Used in Last Day      : 259  
##                      meth                       mushrooms  
##  Never Used            :1429   Never Used            :982  
##  Used over a Decade Ago:  73   Used over a Decade Ago:  4  
##  Used in Last Decade   :  97   Used in Last Decade   :260  
##  Used in Last Year     :  50   Used in Last Year     :115  
##  Used in Last Month    :  48   Used in Last Month    : 40  
##  Used in Last Week     : 149   Used in Last Week     :275  
##  Used in Last Day      :  39   Used in Last Day      :209  
##                    nicotine                      semer     
##  Never Used            :428   Never Used            :1877  
##  Used over a Decade Ago:610   Used over a Decade Ago:   3  
##  Used in Last Decade   :204   Used in Last Decade   :   1  
##  Used in Last Year     :108   Used in Last Year     :   2  
##  Used in Last Month    :157   Used in Last Month    :   2  
##  Used in Last Week     :185   Used in Last Week     :   0  
##  Used in Last Day      :193   Used in Last Day      :   0  
##                      vsa      
##  Never Used            :1455  
##  Used over a Decade Ago:   7  
##  Used in Last Decade   : 135  
##  Used in Last Year     :  13  
##  Used in Last Month    :  14  
##  Used in Last Week     :  61  
##  Used in Last Day      : 200
  1. Zapis danych do pliku
write.csv(drugs, "Żarnowski_dane_przekształcone.csv", row.names = FALSE)
saveRDS(drugs, "drug_consumption_data.Rda")
  1. WIZUALIZACJE

  2. Ostatnie użycie wybranych substancji uzależniających z uwzględnieniem płci i wieku (ggplot2).

library(ggplot2)
library(grid)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
plot1 <- ggplot(data = drugs, aes(x = cannabis, fill = age)) +
  geom_bar() +
  theme_bw() + 
  facet_wrap(~ gender) +
  ggtitle("cannabis") +
  theme(plot.title = element_text(hjust = 0.5),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        legend.position = "none") +
  coord_flip()

plot2 <- ggplot(data = drugs, aes(x = amphetamine, fill = age)) +
  geom_bar() +
  theme_bw() + 
  facet_wrap(~ gender) +
  ggtitle("amphetamine") +
  theme(plot.title = element_text(hjust = 0.5),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y = element_blank(),
        legend.position = "right") +
  coord_flip()

plot3 <- ggplot(data = drugs, aes(x = nicotine, fill = age)) +
  geom_bar() +
  theme_bw() + 
  facet_wrap(~ gender) +
  ggtitle("nicotine") +
  theme(plot.title = element_text(hjust = 0.5),
        axis.title.x=element_blank(),
        axis.ticks.y = element_blank(),
        axis.title.y=element_blank(),
        legend.position = "none") +
  coord_flip()

plot4 <- ggplot(data = drugs, aes(x = ecstasy, fill = age)) +
  geom_bar() +
  theme_bw() + 
  facet_wrap(~ gender) +
  ggtitle("ecstasy") +
  theme(plot.title = element_text(hjust = 0.5),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.y=element_blank()) +
  coord_flip()

grid.arrange(plot1,plot2,plot3,plot4, ncol = 2, nrow = 2,
             top = "Some drugs usage broken down by gender and age",
             left = "Usage",
             bottom = "Number of respondens")

  1. Badanie wpływu aktywnego używania wybranych narkotyków na zmianę profilu osobowości względem osoby nieużywającej danej substancji (graphics).

  2. Funkcje filtrujące dane dla użykowników i abstynentów danej substancji.

users <- function(substance) {
  drugs %>% 
    filter(
      substance == "Used in Last Day" 
      | substance == "Used in Last Week"
      | substance == "Used in Last Month"
      | substance == "Used in Last Year"
    )
}

nonusers <- function(substance) {
  drugs %>% 
    filter(
      substance == "Never Used" 
      | substance == "Used over a Decade Ago"
      | substance == "Used in Last Decade"
    )
}
  1. Funkcja obliczająca średnie wskaźniki profilów osobowości dla danej substancji, z podziałem na użytkowników i abstynentów.
neoac <- function(substance, active) {
  if(active) {
    df = users(substance)
  } else {
    df = nonusers(substance)
  }
  
  var <- df %>%
    summarise(
      n = mean(nscore),
      e = mean(escore),
      o = mean(oscore),
      a = mean(ascore),
      c = mean(cscore))
  
  var <- as.numeric(var[1, ])
  names(var) <- c("n", "e", "o", "a", "c")
  return(var)
}
  1. Wykresy porównujące średnie profile osobowości dla czterech wybranych narkotyków.
library(graphics)
par(mfrow=c(2,2))

# cocaine
cocaine_yes <- neoac(drugs$cocaine, T)
cocaine_no <- neoac(drugs$cocaine, F)

scores_range <- range(12, cocaine_yes, cocaine_no, 25)
plot(cocaine_yes,
     type="b",
     col="red",
     pch = 19,
     ylim=scores_range,
     axes=FALSE,
     ann=FALSE)
axis(1, at=1:5, lab=c("n", "e", "o", "a", "c"))
axis(2, las=1, at=12:scores_range[2])
lines(cocaine_no, type="b", pch=15, lty=2, col="blue")
title(main = "Cocaine",
      xlab = "NEO PI-R personality dimensions",
      ylab = "Average score")
legend(2.5, scores_range[2], c("non-user","user"), cex=0.7, 
       col=c("blue","red"), pch=15:19, lty=2:1)
box()

# ecstasy
ecstasy_yes <- neoac(drugs$ecstasy, T)
ecstasy_no <- neoac(drugs$ecstasy, F)

scores_range <- range(12, ecstasy_yes, ecstasy_no, 25)
plot(ecstasy_yes,
     type="b",
     col="red",
     pch = 19,
     ylim=scores_range,
     axes=FALSE,
     ann=FALSE)
axis(1, at=1:5, lab=c("n", "e", "o", "a", "c"))
axis(2, las=1, at=12:scores_range[2])
lines(ecstasy_no, type="b", pch=15, lty=2, col="blue")
title(main = "Ecstasy",
      xlab = "NEO PI-R personality dimensions",
      ylab = "Average score")
legend(2.5, scores_range[2], c("non-user","user"), cex=0.7, 
       col=c("blue","red"), pch=15:19, lty=2:1)
box()

# meth
meth_yes <- neoac(drugs$meth, T)
meth_no <- neoac(drugs$meth, F)

scores_range <- range(12, meth_yes, meth_no, 25)
plot(meth_yes,
     type="b",
     col="red",
     pch = 19,
     ylim=scores_range,
     axes=FALSE,
     ann=FALSE)
axis(1, at=1:5, lab=c("n", "e", "o", "a", "c"))
axis(2, las=1, at=12:scores_range[2])
lines(meth_no, type="b", pch=15, lty=2, col="blue")
title(main = "Meth",
      xlab = "NEO PI-R personality dimensions",
      ylab = "Average score")
legend(2.5, scores_range[2], c("non-user","user"), cex=0.7, 
       col=c("blue","red"), pch=15:19, lty=2:1)
box()

# crack
crack_yes <- neoac(drugs$meth, T)
crack_no <- neoac(drugs$meth, F)

scores_range <- range(12, crack_yes, crack_no, 25)
plot(crack_yes,
     type="b",
     col="red",
     pch = 19,
     ylim=scores_range,
     axes=FALSE,
     ann=FALSE)
axis(1, at=1:5, lab=c("n", "e", "o", "a", "c"))
axis(2, las=1, at=12:scores_range[2])
lines(crack_no, type="b", pch=15, lty=2, col="blue")
title(main = "Crack",
      xlab = "NEO PI-R personality dimensions",
      ylab = "Average score")
legend(2.5, scores_range[2], c("non-user","user"), cex=0.7, 
       col=c("blue","red"), pch=15:19, lty=2:1)
box()

mtext("Average personality profiles for users and non-users of some drugs",
      side = 3, line = -1.2, outer = TRUE)

par(mfrow=c(1,1))
  1. Badanie zależności pomiędzy używaniem substancji uzależniających w młodości a ryzykiem aktywnego używania obecnie dla osób w wieku 18-34 (lattice).

  2. Przefiltrowanie danych dla wybranych substancji uzależniających i osób w wieku 18-34.

sample_drugs <- c(21:29)
youth <- drugs %>%
  filter(age == "18-24" | age == "25-34") %>%
  select(all_of(sample_drugs))
  1. Obliczenie liczby badanych dla poszczególnych terminów ostatniego użycia substancji.
df <- count(youth, cocaine)
df <- subset(df, select = n)
rownames(df) <- usage
df <- rename(df, cocaine = n )
df <- mutate(df, crack = count(youth, crack)$n)
df <- mutate(df, ecstasy = count(youth, ecstasy)$n)
df <- mutate(df, heroin = count(youth, heroin)$n)
df <- mutate(df, ketamine = count(youth, ketamine)$n)
df <- mutate(df, legalHighs = count(youth, legalHighs)$n)
df <- mutate(df, lsd = count(youth, lsd)$n)
df <- mutate(df, meth = count(youth, meth)$n)
df <- mutate(df, mushrooms = count(youth, mushrooms)$n)
  1. Kategoryzacja terminów ostatniego użycia:
users <- df %>%
  slice(4:7) %>%
  summarise_all(sum)
past_users <- df %>%
  slice(2:3) %>%
  summarise_all(sum)
non_users <- df[1, ]

df <- bind_rows(users, past_users, df[1, ])
df <- as.data.frame(t(df))

df <- add_column(df, .before = 1, drugs = colnames(drugs[,21:29]))
rownames(df) <- c(1:9)
colnames(df) <- c("Substance", "ActiveUsers", "PastUsers", "NonUsers")
  1. Obliczenie procentu osób które pozostają aktywnymi użytkownikami danej substancji, jeśli spróbowali jej w przeszłości.
df <- mutate(df, EverUsed = ActiveUsers + PastUsers)
df <- mutate(df, activeUsersRatio = ActiveUsers/EverUsed)
df$activeUsersRatio <- round(df$activeUsersRatio, digits = 2)
df[ ,c(1,6)]
  1. Wykonanie wykresu zależności między spróbowaniem substancji w przeszłości a aktywnym użytkowaniem obecnie.
library(lattice)
library(latticeExtra)
## 
## Attaching package: 'latticeExtra'
## The following object is masked from 'package:ggplot2':
## 
##     layer
xyplot(ActiveUsers ~ EverUsed, data = df,
       main = "Correlation between early use of addictive substance in youth 
       \nand currently active use",
       xlab = "Respondents who tried drugs in the past",
       ylab = "Active users",
       col = "chocolate",
       pch = 19,
       fontsize = 15,
       panel = function(...) {
         panel.fill(col = "gray4")
         panel.xyplot(...)
       })

  1. Porównanie liczby aktywnych użytkowników w wieku 18-34 z liczbą abstynentów dla wybranych substancji (plotly).
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
plot_ly(data = df, 
        x = ~Substance,
        y = ~ActiveUsers,
        type = "bar",
        name = "Active Users",
        marker = list(color = 'rgb(50,125,190)')) %>%
  add_trace(y = ~PastUsers, name = "Past Users", marker = list(color = 'rgb(200,200,200)')) %>%
  layout(yaxis = list(title = "Users"))
  1. Korelacja pomiędzy liczbą kontaktów z nielegalnymi substancjami uzależniającymi a poziomem edukacji (ggplot2).
illegal <- c(15,19,21:25,27:29,31)
illegal_count <- rowSums(drugs[ ,illegal] != "Never Used", na.rm = TRUE)

drugs <- mutate(drugs, illegal_active = illegal_count)
ggplot(data = drugs, aes(x = illegal_active, y = education)) + 
  geom_count(aes(size = ..n..), color='royalblue3', shape = 15) +
  ggtitle("Correlation between illegal drugs usage and education") +
  labs(x = "Number of illegal drugs ever used", y = "Education level") +
  scale_size("Group", range = c(1,8))